/* Customised HYPHY script for coevolution detection*/
/* Given an alignment of DNA sequences, the script performs model selection between single nucleotide and paired-nucleotide substitution models for all combinations of pairs of sites.*/

function computeScalingFactorB(rateMatrix, baseFreqs)
{
	B = 0;
	for (n1 = 0; n1 < Rows(rateMatrix); n1 = n1+1)
	{
		for (n2 = 0; n2 < Columns(rateMatrix); n2 = n2+1)
		{
			if (n2 != n1)
			{
				B = B + baseFreqs[n1]*baseFreqs[n2]*rateMatrix[n1][n2];
			}
		}
	}
	return B;
}



/* build the paired nucleotide model */

_PairingsVector = {{
0, /* AA - mispairing */
0, /* AC - mispairing */
0, /* AG - mispairing */
1, /* AU - Watson-Crick */

0, /* CA - mispairing */
0, /* CC - mispairing */
1, /* CG - Watson-Crick */
0, /* CU - mispairing */

0, /* GA - mispairing */
1, /* GC - Watson-Crick */
0, /* GG - mispairing */
1, /* GU - mispairing */
 
1, /* UA - Watson-Crick */
0, /* UC - mispairing */
1, /* UG - mispairing */
0  /* UU - mispairing */
}};

function BuildDinucFrequencies (result&,obsF)
{
	result = {16,1};
	h = obsF[0]*obsF[3]+obsF[1]*obsF[2];
	global freq_Mod := 1/(1+2*(h__*(R_gained/R_lost-1)));
	for (h=0; h<4; h=h+1)
	{
		for (v=0; v<4; v=v+1)
		{
			idx = h*4+v;
			if (_PairingsVector[idx])
			{
				result[idx] := freq_Mod*R_gained/R_lost*obsF__[h__]*obsF__[v__];
			}
			else
			{
				result[idx] := freq_Mod*obsF__[h__]*obsF__[v__];			
			}
		}
	}
	return result;
}

/*----------------------------------------------------------------------------------------------------*/

_nucModMatrix = {{"","R_TVTS*","","R_TVTS*"}
				 {"R_TVTS*","","R_TVTS*",""}
				 {"","R_TVTS*","","R_TVTS*"}
				 {"R_TVTS*","","R_TVTS*",""}};

/*----------------------------------------------------------------------------------------------------*/

modelType = 0;

/* #include "modelParameters2.mdl"; */

if (dinucModelType < 0)
{
	return 0;
}

/*
if (modelType == 1)
{
	#include "defineGamma.mdl";
}
if (modelType == 2)
{
	#include "defineHM.mdl";
}
*/

function PopulateModelMatrix (ModelMatrixName&, EFV)
{
	global R_TVTS   = 1;
	global R_gained = 1;
	global R_lost  := 1/R_gained;
		   
	ModelMatrixName = {16,16};
	
	for (h=0; h<16; h=h+1)
	{
		for (v=h+1; v<16; v=v+1)
		{
			if (v!=h)
			{
				fromNuc = -1;
				toNuc   = -1;
				if (h$4 == v$4) /* same 1st nuc */
				{
					toNuc   = v%4;
					fromNuc = h%4;
				}
				else
				{
					if (v%4 == h%4) /* same 2nd nuc */
					{
						toNuc   = v$4;
						fromNuc = h$4;
					}
				}
				if (fromNuc >= 0)
				{
					rateMult  = "";
					rateMult2 = "";
					if (_PairingsVector[h]<_PairingsVector[v])
					{
						rateMult  = "R_gained*";
						rateMult2 = "R_lost*";
					}
					else
					{
						if (_PairingsVector[h]>_PairingsVector[v])
						{
							rateMult   = "R_lost*";
							rateMult2  = "R_gained*";
						}
					}
					rateMult  = _nucModMatrix[fromNuc][toNuc]+rateMult;
					rateMult2 = _nucModMatrix[fromNuc][toNuc]+rateMult2;
					if (modelType >= 1)
					{
						rateMult  = rateMult  + "c*";
						rateMult2 = rateMult2 + "c*";
					}
					
					
					ExecuteCommands ("ModelMatrixName["+h+"]["+v+"]:="+rateMult  +"t*EFV__["+toNuc+"];");
					ExecuteCommands ("ModelMatrixName["+v+"]["+h+"]:="+rateMult2 +"t*EFV__["+fromNuc+"];");
				}
			}
		}
	}
	
	return 0;
}

/*
HarvestFrequencies (pairFreq, pairSites, 1, 1, 1);
*/




SetDialogPrompt ("Please specify nucleotide alignment file:");

DataSet myDS = ReadDataFile (PROMPT_FOR_FILE);

DataSetFilter allSites = CreateFilter (myDS, 1);

HarvestFrequencies (obsFreqs, allSites, 1, 1, 1);



SetDialogPrompt ("Please select a tree file:");
fscanf (PROMPT_FOR_FILE, "Raw", treeString);




/* ----------------------------------------- */

fprintf (stdout, "Enter the leftmost site to test (0-index, max val = ", allSites.sites, "): ");
fscanf (stdin, "Number", left);

fprintf (stdout, "Enter the rightmost site to test (0-index, max val = ", allSites.sites, "): ");
fscanf (stdin, "Number", right);

fprintf (stdout, "Enter the distance to test:");
fscanf (stdin, "Number", distance);

fscanf (stdin, "String", outfile);

fscanf (stdin, "Number", offset); /* the base offset - used to calculated which position in the full alignment we are looking at */
fscanf (stdin, "String", invariantString); /* string of 1's and 0's specifying which sites are invariant and can be ignored. */



fprintf (outfile, CLEAR_FILE, KEEP_OPEN,"fsite1\tfsite2\tpsite1\tpsite2\td(Log L)\tP-value\tscalingB\tR_gained\tR_TVTS\tR_lost\tfreq_Mod\n");
fprintf (outfile, "------------------------------------\n");

for (_i = left ; _i < right; _i = _i + 1)
{
	for (_j = _i+1 ; _j <= _i + distance ; _j = _j + 1)
	{	
		if(invariantString[_i][_i] == "0" && invariantString[_j][_j] == "0") /* compute only if both sites are non-invariant */ 
		{
			ExecuteCommands("DataSetFilter twoSites = CreateFilter (myDS, 1, \""+ _i+","+_j+"\");");
			
			global S = 1.0;
			
			HKY85RateMatrix = {	{*,a,S*a,a}
								{a,*,a,S*a}
								{S*a,a,*,a}
								{a,S*a,a,*}};
			
			Model HKY85 = (HKY85RateMatrix, obsFreqs);
			
			ACCEPT_BRANCH_LENGTHS = 1;
			fprintf (stdout, "B\n");
			Tree myTree = treeString;
			/* constrain branch lengths by scaling factor */
			fprintf (stdout, "C\n");
			global scalingB  = computeScalingFactorB (HKY85Matrix, obsFreqs);
			scalingB :< 1E8;
			
			branchNames 	= BranchName (myTree, -1);
			branchLengths	= BranchLength (myTree, -1);
			
			for (k = 0; k < Columns(branchNames)-1; k=k+1)
			{
				ExecuteCommands("myTree." + branchNames[k] + ".a:=" + branchLengths[k] + "/scalingB;");
			}
			
			
			/* fit independent sites model */
			LikelihoodFunction myLF = (twoSites, myTree);
			Optimize (res, myLF);
			
			
			/* re-read sites as paired character states */
			/* DataSetFilter pairSites = CreateFilter (myDS, 2, "52,56"); */
			ExecuteCommands("DataSetFilter pairSites = CreateFilter (myDS, 2, \"" + _i + ","+ _j+ "\");");
			
			MULTIPLY_BY_FREQS = PopulateModelMatrix ("M95", obsFreqs);
			BuildDinucFrequencies ("pairFreqs", obsFreqs);
			Model M95Model = (M95, pairFreqs, MULTIPLY_BY_FREQS);
			FREQUENCY_SENSITIVE = 1;
			
			ACCEPT_BRANCH_LENGTHS = 1;
			Tree pairTree = treeString;
			
			
			global scalingB = computeScalingFactorB (M95, pairFreqs);
			scalingB :< 1E8;
			
			branchNames 	= BranchName (pairTree, -1);
			branchLengths	= BranchLength (pairTree, -1);
			
			for (k = 0; k < Columns(branchNames)-1; k=k+1)
			{
				ExecuteCommands("pairTree." + branchNames[k] + ".t:=" + branchLengths[k] + "/scalingB;");
			}
			
			
			/* fit independent sites model */
			LikelihoodFunction pairLF = (pairSites, pairTree);
			
			Optimize (pairRes, pairLF);
			
			/*
			fprintf (stdout, res[1][0], " ", res[1][1], " ", res[1][2], "\n");
			fprintf (stdout, pairRes[1][0], " ", pairRes[1][1], " ", pairRes[1][2], "\n");
			*/
			
			dlogl = pairRes[1][0] - res[1][0];
			if (dlogl > 0)
			{
				pval = 1.0-CChi2(2*dlogl, 1);
			}
			else
			{
				pval = "NA";
			}

			fprintf (outfile, (_i+offset), "\t\t", (_j+offset), "\t\t", _i, "\t\t", _j, "\t\t", dlogl, "\t\t", pval, "\t", pairRes[0][0], "\t", pairRes[0][1], "\t", pairRes[0][2], "\t", pairRes[0][3], "\t", pairRes[0][4], "\n");
		}
		else
		{
			fprintf (outfile, (_i+offset), "\t\t", (_j+offset), "\t\t", _i, "\t\t", _j, "\t\tI\t\tI\tI\tI\tI\tI\tI", "\n");
		}
	}
}

fprintf (outfile, CLOSE_FILE);



